Making Backyards Affordable for All: A YIMBY Analysis

Author

Matthew Rivera

Published

October 23, 2025

Introduction

Housing affordability remains one of the most pressing challenges facing American cities today. This analysis examines metropolitan areas across the United States to identify “YIMBY” (Yes In My Backyard) success stories—cities that have effectively addressed housing affordability through permissive building policies. Using data from the US Census Bureau and Bureau of Labor Statistics, we develop metrics to measure rent burden and housing growth, ultimately identifying which cities have made meaningful progress in making housing more affordable.

Data Acquisition

Task 1: Data Import

Code
# First time only - install key
tidycensus::census_api_key("c71b785213dc9e92a64a6453a1dc1bf0a47b3594", overwrite = TRUE, install = TRUE)
[1] "c71b785213dc9e92a64a6453a1dc1bf0a47b3594"
Code
if(!dir.exists(file.path("data", "mp02"))){
    dir.create(file.path("data", "mp02"), showWarnings=FALSE, recursive=TRUE)
}

library <- function(pkg){
    ## Mask base::library() to automatically install packages if needed
    ## Masking is important here so downlit picks up packages and links
    ## to documentation
    pkg <- as.character(substitute(pkg))
    options(repos = c(CRAN = "https://cloud.r-project.org"))
    if(!require(pkg, character.only=TRUE, quietly=TRUE)) install.packages(pkg)
    stopifnot(require(pkg, character.only=TRUE, quietly=TRUE))
}

library(tidyverse)
library(glue)
library(readxl)
library(tidycensus)

get_acs_all_years <- function(variable, geography="cbsa",
                              start_year=2009, end_year=2023){
    fname <- glue("{variable}_{geography}_{start_year}_{end_year}.csv")
    fname <- file.path("data", "mp02", fname)
    
    if(!file.exists(fname)){
        YEARS <- seq(start_year, end_year)
        YEARS <- YEARS[YEARS != 2020] # Drop 2020 - No survey (covid)
        
        ALL_DATA <- map(YEARS, function(yy){
            tidycensus::get_acs(geography, variable, year=yy, survey="acs1") |>
                mutate(year=yy) |>
                select(-moe, -variable) |>
                rename(!!variable := estimate)
        }) |> bind_rows()
        
        write_csv(ALL_DATA, fname)
    }
    
    read_csv(fname, show_col_types=FALSE)
}

# Household income (12 month)
INCOME <- get_acs_all_years("B19013_001") |>
    rename(household_income = B19013_001)

# Monthly rent
RENT <- get_acs_all_years("B25064_001") |>
    rename(monthly_rent = B25064_001)

# Total population
POPULATION <- get_acs_all_years("B01003_001") |>
    rename(population = B01003_001)

# Total number of households
HOUSEHOLDS <- get_acs_all_years("B11001_001") |>
    rename(households = B11001_001)
Code
get_building_permits <- function(start_year = 2009, end_year = 2023){
    fname <- glue("housing_units_{start_year}_{end_year}.csv")
    fname <- file.path("data", "mp02", fname)
    
    if(!file.exists(fname)){
        HISTORICAL_YEARS <- seq(start_year, 2018)
        
        HISTORICAL_DATA <- map(HISTORICAL_YEARS, function(yy){
            historical_url <- glue("https://www.census.gov/construction/bps/txt/tb3u{yy}.txt")
                
            LINES <- readLines(historical_url)[-c(1:11)]

            CBSA_LINES <- str_detect(LINES, "^[[:digit:]]")
            CBSA <- as.integer(str_sub(LINES[CBSA_LINES], 5, 10))

            PERMIT_LINES <- str_detect(str_sub(LINES, 48, 53), "[[:digit:]]")
            PERMITS <- as.integer(str_sub(LINES[PERMIT_LINES], 48, 53))
            
            data_frame(CBSA = CBSA,
                       new_housing_units_permitted = PERMITS, 
                       year = yy)
        }) |> bind_rows()
        
        CURRENT_YEARS <- seq(2019, end_year)
        
        CURRENT_DATA <- map(CURRENT_YEARS, function(yy){
            current_url <- glue("https://www.census.gov/construction/bps/xls/msaannual_{yy}99.xls")
            
            temp <- tempfile()
            
            download.file(current_url, destfile = temp, mode="wb")
            
            fallback <- function(.f1, .f2){
                function(...){
                    tryCatch(.f1(...), 
                             error=function(e) .f2(...))
                }
            }
            
            reader <- fallback(read_xlsx, read_xls)
            
            reader(temp, skip=5) |>
                na.omit() |>
                select(CBSA, Total) |>
                mutate(year = yy) |>
                rename(new_housing_units_permitted = Total)
        }) |> bind_rows()
        
        ALL_DATA <- rbind(HISTORICAL_DATA, CURRENT_DATA)
        
        write_csv(ALL_DATA, fname)
        
    }
    
    read_csv(fname, show_col_types=FALSE)
}

PERMITS <- get_building_permits()
Code
library(httr2)
library(rvest)
get_bls_industry_codes <- function(){
    fname <- fname <- file.path("data", "mp02", "bls_industry_codes.csv")
    
    if(!file.exists(fname)){
    
        resp <- request("https://www.bls.gov") |> 
            req_url_path("cew", "classifications", "industry", "industry-titles.htm") |>
            req_headers(`User-Agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:143.0) Gecko/20100101 Firefox/143.0") |> 
            req_error(is_error = \(resp) FALSE) |>
            req_perform()
        
        resp_check_status(resp)
        
        naics_table <- resp_body_html(resp) |>
            html_element("#naics_titles") |> 
            html_table() |>
            mutate(title = str_trim(str_remove(str_remove(`Industry Title`, Code), "NAICS"))) |>
            select(-`Industry Title`) |>
            mutate(depth = if_else(nchar(Code) <= 5, nchar(Code) - 1, NA)) |>
            filter(!is.na(depth))
        
        naics_table <- naics_table |> 
            filter(depth == 4) |> 
            rename(level4_title=title) |> 
            mutate(level1_code = str_sub(Code, end=2), 
                   level2_code = str_sub(Code, end=3), 
                   level3_code = str_sub(Code, end=4)) |>
            left_join(naics_table, join_by(level1_code == Code)) |>
            rename(level1_title=title) |>
            left_join(naics_table, join_by(level2_code == Code)) |>
            rename(level2_title=title) |>
            left_join(naics_table, join_by(level3_code == Code)) |>
            rename(level3_title=title) |>
            select(-starts_with("depth")) |>
            rename(level4_code = Code) |>
            select(level1_title, level2_title, level3_title, level4_title, 
                   level1_code,  level2_code,  level3_code,  level4_code)
    
        write_csv(naics_table, fname)
    }
    
    read_csv(fname, show_col_types=FALSE)
    
}

INDUSTRY_CODES <- get_bls_industry_codes()
Code
library(httr2)
library(rvest)
get_bls_qcew_annual_averages <- function(start_year=2009, end_year=2023){
    fname <- glue("bls_qcew_{start_year}_{end_year}.csv.gz")
    fname <- file.path("data", "mp02", fname)
    
    YEARS <- seq(start_year, end_year)
    YEARS <- YEARS[YEARS != 2020] # Drop Covid year to match ACS
    
    if(!file.exists(fname)){
        ALL_DATA <- map(YEARS, .progress=TRUE, possibly(function(yy){
            fname_inner <- file.path("data", "mp02", glue("{yy}_qcew_annual_singlefile.zip"))
            
            if(!file.exists(fname_inner)){
                request("https://www.bls.gov") |> 
                    req_url_path("cew", "data", "files", yy, "csv",
                                 glue("{yy}_annual_singlefile.zip")) |>
                    req_headers(`User-Agent` = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:143.0) Gecko/20100101 Firefox/143.0") |> 
                    req_retry(max_tries=5) |>
                    req_perform(fname_inner)
            }
            
            if(file.info(fname_inner)$size < 755e5){
                warning(sQuote(fname_inner), "appears corrupted. Please delete and retry this step.")
            }
            
            read_csv(fname_inner, 
                     show_col_types=FALSE) |> 
                mutate(YEAR = yy) |>
                select(area_fips, 
                       industry_code, 
                       annual_avg_emplvl, 
                       total_annual_wages, 
                       YEAR) |>
                filter(nchar(industry_code) <= 5, 
                       str_starts(area_fips, "C")) |>
                filter(str_detect(industry_code, "-", negate=TRUE)) |>
                mutate(FIPS = area_fips, 
                       INDUSTRY = as.integer(industry_code), 
                       EMPLOYMENT = as.integer(annual_avg_emplvl), 
                       TOTAL_WAGES = total_annual_wages) |>
                select(-area_fips, 
                       -industry_code, 
                       -annual_avg_emplvl, 
                       -total_annual_wages) |>
                # 10 is a special value: "all industries" , so omit
                filter(INDUSTRY != 10) |> 
                mutate(AVG_WAGE = TOTAL_WAGES / EMPLOYMENT)
        })) |> bind_rows()
        
        write_csv(ALL_DATA, fname)
    }
    
    ALL_DATA <- read_csv(fname, show_col_types=FALSE)
    
    ALL_DATA_YEARS <- unique(ALL_DATA$YEAR)
    
    YEARS_DIFF <- setdiff(YEARS, ALL_DATA_YEARS)
    
    if(length(YEARS_DIFF) > 0){
        stop("Download failed for the following years: ", YEARS_DIFF, 
             ". Please delete intermediate files and try again.")
    }
    
    ALL_DATA
}

WAGES <- get_bls_qcew_annual_averages()

Data Structure Overview

erDiagram
    INCOME {
        varchar GEOID PK "CBSA code"
        varchar NAME "CBSA name"
        int year PK "2009-2023, excl 2020"
        decimal household_income "Median annual income"
    }
    
    RENT {
        varchar GEOID PK "CBSA code"
        varchar NAME
        int year PK "2009-2023, excl 2020"
        decimal monthly_rent "Median gross rent"
    }
    
    POPULATION {
        varchar GEOID PK "CBSA code"
        varchar NAME
        int year PK "2009-2023, excl 2020"
        int population "Total population"
    }
    
    HOUSEHOLDS {
        varchar GEOID PK "CBSA code"
        varchar NAME
        int year PK "2009-2023, excl 2020"
        int households "Total households"
    }
    
    PERMITS {
        int CBSA PK "CBSA code as integer"
        int year PK "2009-2023, all years"
        int new_housing_units_permitted
    }
    
    WAGES {
        varchar FIPS PK "CBSA with C prefix"
        int INDUSTRY PK "NAICS code"
        int YEAR PK "2009-2023, excl 2020"
        int EMPLOYMENT
        decimal TOTAL_WAGES
        decimal AVG_WAGE
    }
    
    INDUSTRY_CODES {
        varchar level4_code PK "5-digit NAICS"
        varchar level4_title
        varchar level3_code "4-digit NAICS"
        varchar level3_title
        varchar level2_code "3-digit NAICS"
        varchar level2_title
        varchar level1_code "2-digit sector"
        varchar level1_title
    }
    
    %% Direct Census ACS joins (GEOID, year)
    INCOME ||--|| RENT : "GEOID, year"
    INCOME ||--|| POPULATION : "GEOID, year"
    INCOME ||--|| HOUSEHOLDS : "GEOID, year"
    RENT ||--|| POPULATION : "GEOID, year"
    RENT ||--|| HOUSEHOLDS : "GEOID, year"
    POPULATION ||--|| HOUSEHOLDS : "GEOID, year"
    
    %% Complex joins requiring transformation
    PERMITS }o--|| INCOME : "int(GEOID)"
    PERMITS }o--|| POPULATION : "int(GEOID)"
    WAGES }o--|| INCOME : "remove C prefix"
    WAGES }o--|| POPULATION : "remove C prefix"
    
    %% Industry code lookup
    WAGES }o--o{ INDUSTRY_CODES : "INDUSTRY code"

We have successfully imported five main datasets:

  • INCOME: Household income by CBSA and year
  • RENT: Monthly rent by CBSA and year
  • POPULATION: Total population by CBSA and year
  • HOUSEHOLDS: Number of households by CBSA and year
  • PERMITS: New housing units permitted by CBSA and year
  • WAGES: Employment and wage data by CBSA, industry, and year
  • INDUSTRY_CODES: NAICS industry code lookup table

Key Join Keys: - Census data uses GEOID (as character) and NAME (CBSA name) - BLS data uses FIPS (CBSA code as “C12345”) - Permits data uses CBSA (as integer) - All datasets include year for temporal joins

Data Integration and Initial Exploration

Task 2: Multi-Table Questions

Question 1: Which CBSA (by name) permitted the largest number of new housing units in the decade from 2010 to 2019 (inclusive)?

Code
# Load DT if not already loaded (add to setup chunk or here)
library(DT)

# Which CBSA permitted the most new housing units from 2010-2019?
q1_result <- PERMITS |>
  filter(year >= 2010, year <= 2019) |>
  group_by(CBSA) |>
  summarize(total_permits = sum(new_housing_units_permitted, na.rm = TRUE)) |>
  arrange(desc(total_permits)) |>
  slice(1) |>
  left_join(POPULATION |> select(GEOID, NAME) |> distinct(),
            by = c("CBSA" = "GEOID"))

# Display result
q1_result |>
  select(NAME, total_permits) |>
  DT::datatable(caption = "CBSA with Most Housing Permits (2010-2019)",
            options = list(pageLength = 5))

Answer: The Houston-Sugar Land-Baytown, TX Metro Area CBSA permitted the largest number of new housing units between 2010 and 2019, with a total of 482,075 units.

Question 2: In what year did Albuquerque, NM (CBSA Number 10740) permit the most new housing units?

Code
# In what year did Albuquerque permit the most new housing units?
q2_result <- PERMITS |>
  filter(CBSA == 10740) |>
  arrange(desc(new_housing_units_permitted)) |>
  slice(1)

# Show all years for context
albuquerque_permits <- PERMITS |>
  filter(CBSA == 10740) |>
  arrange(year)

# Display
albuquerque_permits |>
  datatable(caption = "Albuquerque Housing Permits by Year",
            options = list(pageLength = 15))

Answer: Albuquerque permitted the most new housing units in 2021, with 4,021 permits issued.

Note: Be careful about the COVID-19 data artifact mentioned in the instructions.

Question 3: Which state (not CBSA) had the highest average individual income in 2015? To answer this question, you will need to first compute the total income per CBSA by multiplying the average household income by the number of households, and then sum total income and total population across all CBSAs in a state. With these numbers, you can answer this question.

Code
# Create state abbreviation lookup
state_df <- data.frame(
  abb  = c(state.abb, "DC", "PR"),
  name = c(state.name, "District of Columbia", "Puerto Rico")
)

# Calculate state-level average individual income for 2015
# Assumes INCOME has 'household_income' (average household income),
# HOUSEHOLDS has 'households' (number of households),
# and POPULATION has 'population' (total population),
# all joined by GEOID, NAME, year

q3_result <- INCOME |>
  filter(year == 2015) |>
  left_join(HOUSEHOLDS |> filter(year == 2015), 
            by = c("GEOID", "NAME", "year")) |>
  left_join(POPULATION |> filter(year == 2015), 
            by = c("GEOID", "NAME", "year")) |>
  mutate(state = str_extract(NAME, ", (.{2})", group = 1),
         total_income = household_income * households) |>
  group_by(state) |>
  summarize(
    total_income = sum(total_income, na.rm = TRUE),
    total_population = sum(population, na.rm = TRUE),
    avg_individual_income = total_income / total_population
  ) |>
  arrange(desc(avg_individual_income)) |>
  left_join(state_df, by = c("state" = "abb"))

# Display top 10
q3_result |>
  slice(1:10) |>
  select(name, state, avg_individual_income, total_population) |>
  mutate(avg_individual_income = scales::dollar(avg_individual_income)) |>
  DT::datatable(caption = "States by Average Individual Income (2015)",
            options = list(pageLength = 15))

Answer: Massachusetts had the highest average individual income in 2015 at 2.7620622^{4}.

Question 4: Data scientists and business analysts are recorded under NAICS code 5182. What is the last year in which the NYC CBSA had the most data scientists in the country? In recent, the San Francisco CBSA has had the most data scientists.

Code
# Data scientists are NAICS code 5182
# Need to standardize CBSA codes between Census and BLS data

data_scientists <- WAGES |>
  filter(INDUSTRY == 5182) |>
  mutate(std_cbsa = paste0(FIPS, "0")) |>
  select(std_cbsa, YEAR, EMPLOYMENT)

# Find which CBSA had most data scientists each year
q4_result <- data_scientists |>
  group_by(YEAR) |>
  slice_max(EMPLOYMENT, n = 1) |>
  ungroup() |>
  left_join(
    POPULATION |> 
      mutate(std_cbsa = paste0("C", GEOID)) |>
      select(std_cbsa, NAME) |>
      distinct(),
    by = "std_cbsa"
  )

# Display
q4_result |>
  select(YEAR, NAME, EMPLOYMENT) |>
  datatable(caption = "CBSA with Most Data Scientists by Year",
            options = list(pageLength = 15))

Answer: NYC last had the most data scientists in 2015.

Question 5: What fraction of total wages in the NYC CBSA was earned by people employed in the finance and insurance industries (NAICS code 52)? In what year did this fraction peak?

Code
# Finance and insurance is NAICS code 52
nyc_finance <- WAGES |>
  filter(FIPS == "C3562") |>  # NYC CBSA code (corrected)
  mutate(is_finance = INDUSTRY == 52) |>
  group_by(YEAR) |>
  summarize(
    finance_wages = sum(TOTAL_WAGES[is_finance], na.rm = TRUE),
    total_wages = sum(TOTAL_WAGES, na.rm = TRUE),
    finance_fraction = finance_wages / total_wages
  )

# Find peak year
peak_year <- nyc_finance |>
  slice_max(finance_fraction, n = 1)

cat("Peak Year:", peak_year$YEAR, 
    "\nFinance Fraction:", scales::percent(peak_year$finance_fraction, accuracy = 0.1))
Peak Year: 2014 
Finance Fraction: 4.6%
Code
# Display table
nyc_finance |>
  mutate(
    finance_wages_b = scales::dollar(finance_wages, scale = 1e-9, suffix = "B", accuracy = 0.1),
    total_wages_b = scales::dollar(total_wages, scale = 1e-9, suffix = "B", accuracy = 0.1),
    finance_fraction_pct = scales::percent(finance_fraction, accuracy = 0.1)
  ) |>
  select(YEAR, finance_wages_b, total_wages_b, finance_fraction_pct) |>
  DT::datatable(caption = "Finance Sector Wages as % of Total in NYC CBSA",
            options = list(pageLength = 10))

Answer: The finance and insurance sector peaked at of total NYC wages in 2014.

Task 3: Initial Visualizations

Visualization 1: Rent vs. Household Income (2009)

Code
# Join rent and income data for 2009
rent_income_2009 <- RENT |>
  filter(year == 2009) |>
  inner_join(INCOME |> filter(year == 2009), 
             by = c("GEOID", "NAME", "year"))

# Create scatter plot
ggplot(rent_income_2009, aes(x = household_income, y = monthly_rent)) +
  geom_point(alpha = 0.6, color = "steelblue", size = 2) +
  geom_smooth(method = "lm", color = "darkred", se = TRUE) +
  scale_x_continuous(labels = scales::dollar_format(), 
                     limits = c(0, NA)) +
  scale_y_continuous(labels = scales::dollar_format(),
                     limits = c(0, NA)) +
  labs(
    title = "Relationship Between Household Income and Monthly Rent",
    subtitle = "Core-Based Statistical Areas (CBSAs) in 2009",
    x = "Average Household Income (Annual)",
    y = "Average Monthly Rent",
    caption = "Source: US Census Bureau, American Community Survey"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    panel.grid.minor = element_blank()
  )

Interpretation: There is a strong positive relationship between household income and monthly rent across CBSAs. Higher-income areas tend to have higher rents, suggesting that housing costs scale with local economic conditions.

Visualization 2: Total vs. Healthcare Employment Over Time

Code
# Load required libraries
library(ggplot2)
library(scales)  # For comma_format()
library(viridis) # For scale_color_viridis_c() (if not already loaded)
library(dplyr)   # For mutate, group_by, summarize (if not already loaded)

# Healthcare is NAICS code 62
healthcare_employment <- WAGES |>
  mutate(is_healthcare = INDUSTRY == 62,
         std_cbsa = paste0(FIPS, "0")) |>
  group_by(std_cbsa, YEAR) |>
  summarize(
    healthcare_employment = sum(EMPLOYMENT[is_healthcare], na.rm = TRUE),
    total_employment = sum(EMPLOYMENT, na.rm = TRUE),
    .groups = "drop"
  ) |>
  filter(total_employment > 0, healthcare_employment > 0)

# Create scatter plot with time progression
ggplot(healthcare_employment, 
       aes(x = total_employment, y = healthcare_employment, color = YEAR)) +
  geom_point(alpha = 0.5, size = 1.5) +
  scale_color_viridis_c(option = "plasma") +
  scale_x_log10(labels = scales::comma_format()) +
  scale_y_log10(labels = scales::comma_format()) +
  labs(
    title = "Healthcare Employment vs. Total Employment Across CBSAs",
    subtitle = "Evolution from 2009-2023 (log-log scale)",
    x = "Total Employment (log scale)",
    y = "Healthcare & Social Services Employment (log scale)",
    color = "Year",
    caption = "Source: Bureau of Labor Statistics, Quarterly Census of Employment and Wages"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    legend.position = "right",
    panel.grid.minor = element_blank()
  )

Interpretation: Healthcare employment grows proportionally with total employment across CBSAs. The color gradient shows the temporal evolution, with more recent years showing slightly higher healthcare employment shares.

Visualization 3: Household Size Evolution Over Time

Code
# Calculate household size (persons per household)
household_size <- POPULATION |>
  inner_join(HOUSEHOLDS, by = c("GEOID", "NAME", "year")) |>
  mutate(household_size = population / households) |>
  filter(!is.na(household_size), household_size > 0)

# Select top 20 CBSAs by 2019 population for readability
top_cbsas <- POPULATION |>
  filter(year == 2019) |>
  slice_max(population, n = 20) |>
  pull(GEOID)

household_size_subset <- household_size |>
  filter(GEOID %in% top_cbsas) |>
  mutate(highlight = NAME %in% c("New York-Newark-Jersey City, NY-NJ-PA Metro Area",
                                  "Los Angeles-Long Beach-Anaheim, CA Metro Area"))

# Load gghighlight
library(gghighlight)

# Create line plot with highlighting
ggplot(household_size_subset, aes(x = year, y = household_size, group = NAME, color = NAME)) +
  geom_line(linewidth = 0.8) +
  gghighlight(highlight, 
              use_direct_label = TRUE,
              label_key = NAME,
              label_params = list(size = 3.5, nudge_x = 0.5)) +
  scale_y_continuous(limits = c(2, 3.5)) +
  labs(
    title = "Evolution of Average Household Size Over Time",
    subtitle = "Top 20 largest US metropolitan areas (2009-2023) - NYC and LA highlighted",
    x = "Year",
    y = "Average Household Size (persons per household)",
    caption = "Source: US Census Bureau, American Community Survey\nNote: 2020 data unavailable due to COVID-19"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    panel.grid.minor = element_blank(),
    legend.position = "none"
  )

Interpretation: Household sizes have remained relatively stable over time, hovering around 2.5-3.0 persons per household, with modest variation across metropolitan areas.

Building Indices

Task 4: Rent Burden Index

Code
# Join income and rent data
rent_burden_data <- INCOME |>
    inner_join(RENT, by = c("GEOID", "NAME", "year")) |>
    mutate(
        # Monthly income
        monthly_income = household_income / 12,
        # Rent to income ratio
        rent_to_income = monthly_rent / monthly_income,
        # Convert to percentage
        rent_burden_pct = rent_to_income * 100
    )

# Calculate baseline (national average across all years)
baseline_rent_burden <- mean(rent_burden_data$rent_burden_pct, na.rm = TRUE)

# Create standardized rent burden index
# 0 = national average, positive = higher burden, negative = lower burden
# Scaled by standard deviation
rent_burden_sd <- sd(rent_burden_data$rent_burden_pct, na.rm = TRUE)

rent_burden_data <- rent_burden_data |>
    mutate(
        rent_burden_index = (rent_burden_pct - baseline_rent_burden) / rent_burden_sd * 10 + 50,
        # Ensure index is between 0 and 100
        rent_burden_index = pmax(0, pmin(100, rent_burden_index))
    )
Code
# Table 1: Single metro area over time (NYC)
nyc_rent_burden <- rent_burden_data |>
    filter(str_detect(NAME, "New York-Newark-Jersey")) |>
    select(Year = year, NAME, `Monthly Rent` = monthly_rent, 
           `Household Income` = household_income, 
           `Rent Burden %` = rent_burden_pct, 
           `Rent Burden Index` = rent_burden_index) |>
    mutate(
        `Monthly Rent` = scales::dollar(`Monthly Rent`),
        `Household Income` = scales::dollar(`Household Income`),
        `Rent Burden %` = round(`Rent Burden %`, 1),
        `Rent Burden Index` = round(`Rent Burden Index`, 1)
    )

datatable(nyc_rent_burden, 
          caption = "Rent Burden Evolution: New York-Newark-Jersey City",
          options = list(pageLength = 15))

New York Metro Area Trends (2013-2023)

  • Rent burden improved slightly from 22.6% (2013) to 22.2% (2023)
  • The Rent Burden Index decreased from 59.2 to 58.1, indicating modest improvement relative to national standards
  • Best year: 2019 (21.4% burden, 55.5 index) - likely due to strong income growth
  • Worst year: 2014 (22.9% burden, 60.3 index)
  • COVID impact: 2021 saw a spike to 22.7% as rents jumped faster than incomes
Code
# Table 2: Highest and lowest rent burden (2023)
extreme_rent_burden <- rent_burden_data |>
    filter(year == max(year)) |>
    select(CBSA = NAME, `Monthly Rent` = monthly_rent, 
           `Household Income` = household_income,
           `Rent Burden %` = rent_burden_pct,
           `Rent Burden Index` = rent_burden_index) |>
    arrange(desc(`Rent Burden Index`)) |>
    mutate(rank = row_number()) |>
    filter(rank <= 10 | rank > n() - 10) |>
    mutate(
        Category = ifelse(rank <= 10, "Highest Burden", "Lowest Burden"),
        `Monthly Rent` = scales::dollar(`Monthly Rent`),
        `Household Income` = scales::dollar(`Household Income`),
        `Rent Burden %` = round(`Rent Burden %`, 1),
        `Rent Burden Index` = round(`Rent Burden Index`, 1)
    ) |>
    select(Category, CBSA, `Monthly Rent`, `Household Income`, 
           `Rent Burden %`, `Rent Burden Index`)

datatable(extreme_rent_burden,
          caption = "CBSAs with Highest and Lowest Rent Burden (2023)")

National Extremes (2023) Highest Burden Cities (NIMBY characteristics):

  • Florida dominates: 7 of top 10 are Florida metros (Miami, Cape Coral, Tampa, etc.)
  • Clearlake, CA has worst burden: 31.2% (index 86.5)
  • Miami metro: 30.1% burden despite high incomes ($76K) - housing costs outpacing wages

Key Insight: High-burden cities show classic NIMBY patterns - demand exceeds supply due to restrictive development.

Task 5: Housing Growth Index

Code
# Join population and permits
housing_growth_data <- POPULATION |>
    inner_join(PERMITS, by = c("GEOID" = "CBSA", "year")) |>
    arrange(GEOID, year) |>
    group_by(GEOID) |>
    mutate(
        # 5-year lagged population for growth calculation
        pop_5yr_ago = lag(population, 5),
        # 5-year population growth
        pop_growth_5yr = population - pop_5yr_ago,
        pop_growth_rate_5yr = (population - pop_5yr_ago) / pop_5yr_ago
    ) |>
    ungroup()

# Filter to years where we have 5-year lookback (2014+)
housing_growth_data <- housing_growth_data |>
    filter(year >= 2014, !is.na(pop_growth_5yr))

# Metric 1: Instantaneous housing growth
# Measures current construction activity relative to existing population
# Permits per 1000 residents captures how aggressively a city is building
# independent of whether population is growing or shrinking
housing_growth_data <- housing_growth_data |>
    mutate(
        permits_per_1000 = (new_housing_units_permitted / population) * 1000
    )

# Standardize metric 1 to 0-100 scale (mean=50, sd=10)
mean_perm_per_1000 <- mean(housing_growth_data$permits_per_1000, na.rm = TRUE)
sd_perm_per_1000 <- sd(housing_growth_data$permits_per_1000, na.rm = TRUE)

housing_growth_data <- housing_growth_data |>
    mutate(
        instant_housing_index = (permits_per_1000 - mean_perm_per_1000) / 
                                sd_perm_per_1000 * 10 + 50,
        instant_housing_index = pmax(0, pmin(100, instant_housing_index))
    )

# Metric 2: Rate-based housing growth
# Measures whether housing supply keeps pace with population increases
# A ratio > 1 means more housing units than new residents (good for affordability)
# Only calculated for growing metros (negative growth would give misleading ratios)
housing_growth_data <- housing_growth_data |>
    mutate(
        permits_to_growth_ratio = ifelse(pop_growth_5yr > 0,
                                        new_housing_units_permitted / pop_growth_5yr,
                                        NA)
    )

# Standardize metric 2 to 0-100 scale
mean_ratio <- mean(housing_growth_data$permits_to_growth_ratio, na.rm = TRUE)
sd_ratio <- sd(housing_growth_data$permits_to_growth_ratio, na.rm = TRUE)

housing_growth_data <- housing_growth_data |>
    mutate(
        rate_housing_index = (permits_to_growth_ratio - mean_ratio) / 
                            sd_ratio * 10 + 50,
        rate_housing_index = pmax(0, pmin(100, rate_housing_index))
    )

# Composite score (weighted average)
# 60% weight on instantaneous metric: Prioritizes absolute construction activity
#    - Rewards cities actively building regardless of population trends
#    - Important for increasing overall housing stock
# 40% weight on rate-based metric: Ensures housing keeps pace with growth
#    - Prevents scenarios where cities build a lot but still fall behind demand
#    - Balances absolute construction with responsiveness to population changes
# This weighting balances "building enough" with "building appropriately for growth"
housing_growth_data <- housing_growth_data |>
    mutate(
        composite_housing_index = 0.6 * instant_housing_index + 
                                 0.4 * rate_housing_index
    )
Code
# High scorers on instantaneous metric
instant_high <- housing_growth_data |>
    filter(year == max(year)) |>
    slice_max(instant_housing_index, n = 10) |>
    select(CBSA = NAME, Year = year, `Permits per 1000` = permits_per_1000,
           `Instant Index` = instant_housing_index)

datatable(instant_high, caption = "Top 10 CBSAs: Instantaneous Housing Growth (2023)")

Instantaneous Growth (2023)

Top performers building aggressively:

  1. Myrtle Beach, SC: 33.1 permits per 1,000 residents (100 index)

  2. Salisbury, MD: 37.7 permits per 1,000 (100 index)

  3. Punta Gorda, FL: 21.5 permits per 1,000 (99.1 index)

Pattern: Coastal resort/retirement destinations building rapidly to meet demand.

Code
# High scorers on rate-based metric
rate_high <- housing_growth_data |>
    filter(year == max(year), !is.na(rate_housing_index)) |>
    slice_max(rate_housing_index, n = 10) |>
    select(CBSA = NAME, Year = year, 
           `Permits/Growth Ratio` = permits_to_growth_ratio,
           `Rate Index` = rate_housing_index)

datatable(rate_high, caption = "Top 10 CBSAs: Rate-Based Housing Growth (2023)")

Rate-Based Growth (Efficiency)

Most efficient builders (permits relative to population growth):

  1. Springfield, OH: 4.06 permits per growth unit (71.0 index)

  2. Urban Honolulu, HI: 2.44 permits per growth unit (62.1 index)

Key Insight: These cities build MORE housing than population growth requires, actively creating affordability buffers.

Code
# High scorers on composite
composite_high <- housing_growth_data |>
    filter(year == max(year)) |>
    slice_max(composite_housing_index, n = 15) |>
    select(CBSA = NAME, Year = year,
           `Instant Index` = instant_housing_index,
           `Rate Index` = rate_housing_index,
           `Composite Index` = composite_housing_index) |>
    mutate(across(ends_with("Index"), ~round(., 1)))

datatable(composite_high, caption = "Top 15 CBSAs: Composite Housing Growth (2023)")

Composite Index Leaders

Best overall growth strategies:

  • Punta Gorda, FL: 79.3 composite (high instant + moderate efficiency)

  • Crestview, FL: 73.9 composite

  • North Port-Sarasota, FL: 72.4 composite

Pattern: Florida Gulf Coast metros dominate due to aggressive permitting + strong market demand.

Task 6: YIMBY Analysis

Code
# Combine rent burden and housing growth metrics
yimby_data <- rent_burden_data |>
    left_join(housing_growth_data |> 
              select(GEOID, year, instant_housing_index, 
                     rate_housing_index, composite_housing_index,
                     pop_growth_5yr, pop_growth_rate_5yr),
              by = c("GEOID", "year"))

# Calculate early period rent burden (2009-2013)
early_rent <- yimby_data |>
    filter(year <= 2013) |>
    group_by(GEOID, NAME) |>
    summarize(early_rent_burden = mean(rent_burden_index, na.rm = TRUE),
              .groups = "drop")

# Calculate recent metrics (2019-2023)
recent_metrics <- yimby_data |>
    filter(year >= 2019) |>
    group_by(GEOID, NAME) |>
    summarize(
        recent_rent_burden = mean(rent_burden_index, na.rm = TRUE),
        recent_housing_growth = mean(composite_housing_index, na.rm = TRUE),
        recent_pop_growth = mean(pop_growth_rate_5yr, na.rm = TRUE),
        .groups = "drop"
    )
Code
# Identify YIMBY successes
yimby_candidates <- early_rent |>
    inner_join(recent_metrics, by = c("GEOID", "NAME")) |>
    mutate(
        rent_burden_change = recent_rent_burden - early_rent_burden,
        had_high_rent = early_rent_burden > 50,
        rent_decreased = rent_burden_change < -2,
        pop_growing = recent_pop_growth > 0,
        high_housing_growth = recent_housing_growth > 55,
        yimby_score = (had_high_rent * 25) + 
                     (rent_decreased * 25) + 
                     (pop_growing * 25) + 
                     (high_housing_growth * 25)
    )

# Top YIMBY cities
yimby_winners <- yimby_candidates |>
    filter(yimby_score >= 75) |>
    arrange(desc(yimby_score)) |>
    select(CBSA = NAME, 
           `Early Rent Burden` = early_rent_burden,
           `Recent Rent Burden` = recent_rent_burden,
           `Rent Change` = rent_burden_change,
           `Housing Growth` = recent_housing_growth,
           `Pop Growth %` = recent_pop_growth,
           `YIMBY Score` = yimby_score) |>
    mutate(
        `Early Rent Burden` = round(`Early Rent Burden`, 1),
        `Recent Rent Burden` = round(`Recent Rent Burden`, 1),
        `Rent Change` = round(`Rent Change`, 1),
        `Housing Growth` = round(`Housing Growth`, 1),
        `Pop Growth %` = scales::percent(`Pop Growth %`, accuracy = 0.1),
        `YIMBY Score` = round(`YIMBY Score`, 0)
    )

datatable(yimby_winners, 
          caption = "YIMBY Success Stories: High Rent → Growing Population + Housing",
          options = list(pageLength = 10))

YIMBY Success Stories (89 cities!) Characteristics of success:

Started with high rent burden (50-72%) Reduced burden by 2-9 percentage points Achieved through strong housing growth (55-68 composite index) Sustained population growth (3-23%)

Standout Examples:

  1. Hinesville, GA: Reduced burden by 9.3 points (72.4% → 63.1%) Lake

  2. Havasu, AZ: Reduced burden by 8.2 points (67.4% → 59.1%)

  3. Athens, GA: Reduced burden by 6.4 points while growing 7.1%

  4. Gainesville, FL: Maintained growth (23%) while reducing burden despite starting at 69.7%

Formula for success: Build housing faster than population grows → rents stabilize → affordability improves.

Code
# NIMBY cities (high rent, low housing growth)
nimby_cities <- yimby_candidates |>
    filter(had_high_rent, !rent_decreased, high_housing_growth == FALSE) |>
    arrange(desc(recent_rent_burden)) |>
    select(CBSA = NAME,
           `Recent Rent Burden` = recent_rent_burden,
           `Housing Growth` = recent_housing_growth) |>
    head(10) |>
    mutate(
        `Recent Rent Burden` = round(`Recent Rent Burden`, 1),
        `Housing Growth` = round(`Housing Growth`, 1)
    )

datatable(nimby_cities,
          caption = "NIMBY Cities: High Rent + Low Housing Growth")

NIMBY Hall of Shame (10 cities)

Miami-Fort Lauderdale leads the pack:

  • 83.2 rent burden (4th worst nationally)

  • 49.9 housing growth (below median)

  • Growing population but NOT building enough housing

California dominates the list:

  • Santa Maria-Santa Barbara (70.8 burden, 47.5 growth)

  • Salinas (69.7 burden, 45.2 growth)

  • San Luis Obispo (67.9 burden, 49.8 growth)

  • Riverside-San Bernardino (65.4 burden, 48.5 growth)

Nevada’s Las Vegas (64.9 burden, 53.0 growth): Growing but not keeping pace with demand.

Key Insight: These cities have high demand, high rents, BUT restrictive zoning/permitting prevents adequate housing supply → affordability crisis persists.

Visualization 1: Rent Burden vs Housing Growth

Code
# Recent period comparison
viz_data_recent <- yimby_data |>
    filter(year >= 2019) |>
    group_by(GEOID, NAME) |>
    summarize(
        avg_rent_burden = mean(rent_burden_index, na.rm = TRUE),
        avg_housing_growth = mean(composite_housing_index, na.rm = TRUE),
        .groups = "drop"
    ) |>
    filter(!is.na(avg_rent_burden), !is.na(avg_housing_growth))

# Identify quadrants
viz_data_recent <- viz_data_recent |>
    mutate(
        quadrant = case_when(
            avg_rent_burden > 50 & avg_housing_growth > 50 ~ "High Rent, High Growth (YIMBY Success)",
            avg_rent_burden > 50 & avg_housing_growth <= 50 ~ "High Rent, Low Growth (NIMBY)",
            avg_rent_burden <= 50 & avg_housing_growth > 50 ~ "Low Rent, High Growth",
            TRUE ~ "Low Rent, Low Growth"
        )
    )

ggplot(viz_data_recent, aes(x = avg_rent_burden, y = avg_housing_growth)) +
    geom_hline(yintercept = 50, linetype = "dashed", color = "gray50") +
    geom_vline(xintercept = 50, linetype = "dashed", color = "gray50") +
    geom_point(aes(color = quadrant), alpha = 0.6, size = 2.5) +
    scale_color_manual(
        values = c("High Rent, High Growth (YIMBY Success)" = "darkgreen",
                   "High Rent, Low Growth (NIMBY)" = "darkred",
                   "Low Rent, High Growth" = "steelblue",
                   "Low Rent, Low Growth" = "gray60"),
        name = "Category"
    ) +
    labs(
        title = "Rent Burden vs Housing Growth Index (2019-2023 Average)",
        subtitle = "Identifying YIMBY Success Stories and NIMBY Problem Areas",
        x = "Rent Burden Index (50 = National Average)",
        y = "Housing Growth Index (50 = National Average)",
        caption = "Source: U.S. Census Bureau, QCEW"
    ) +
    theme_minimal(base_size = 12) +
    theme(
        plot.title = element_text(face = "bold"),
        legend.position = "bottom"
    ) +
    guides(color = guide_legend(ncol = 2))

Visualization 2: Rent Burden Change Over Time

Code
# Track top YIMBY and NIMBY cities over time
top_yimby_names <- yimby_winners |> head(5) |> pull(CBSA)
top_nimby_names <- nimby_cities |> head(5) |> pull(CBSA)

viz_time_data <- yimby_data |>
    filter(NAME %in% c(top_yimby_names, top_nimby_names)) |>
    mutate(
        category = ifelse(NAME %in% top_yimby_names, "YIMBY Success", "NIMBY Challenge")
    )

ggplot(viz_time_data, aes(x = year, y = rent_burden_index, 
                          color = NAME, linetype = category)) +
    geom_line(size = 1) +
    geom_hline(yintercept = 50, linetype = "dotted", color = "black") +
    scale_linetype_manual(values = c("YIMBY Success" = "solid", 
                                     "NIMBY Challenge" = "dashed"),
                         name = "City Type") +
    labs(
        title = "Evolution of Rent Burden: YIMBY Success vs NIMBY Challenge Cities",
        subtitle = "Tracking top 5 cities in each category",
        x = "Year",
        y = "Rent Burden Index (50 = National Average)",
        color = "Metropolitan Area",
        caption = "Source: U.S. Census Bureau, American Community Survey"
    ) +
    theme_minimal(base_size = 11) +
    theme(
        plot.title = element_text(face = "bold"),
        legend.position = "right",
        legend.text = element_text(size = 8)
    )

Task 7: Policy Brief

Policy Brief: The Affordable Backyards Act

Code
# Identify sponsor cities
sponsor_yimby <- yimby_winners |> slice(1) |> pull(CBSA)
sponsor_nimby <- nimby_cities |> slice(1) |> pull(CBSA)

# Extract key industries for each city
# Get CBSA FIPS for sponsors
sponsor_yimby_fips <- yimby_data |> 
    filter(NAME == sponsor_yimby) |> 
    pull(GEOID) |> 
    first()

sponsor_nimby_fips <- yimby_data |> 
    filter(NAME == sponsor_nimby) |> 
    pull(GEOID) |> 
    first()

# Find important industries in both cities
industry_analysis <- WAGES |>
    mutate(std_cbsa = paste0("C", str_sub(FIPS, 2, -2))) |>
    filter(std_cbsa %in% c(paste0("C", sponsor_yimby_fips), 
                           paste0("C", sponsor_nimby_fips)),
           YEAR == 2023) |>
    # Convert INDUSTRY to character to match level2_code
    mutate(INDUSTRY_char = as.character(INDUSTRY)) |>
    left_join(INDUSTRY_CODES, by = c("INDUSTRY_char" = "level2_code")) |>
    group_by(std_cbsa, INDUSTRY, level2_title) |>
    summarize(
        total_emp = sum(EMPLOYMENT, na.rm = TRUE),
        avg_wage = mean(AVG_WAGE, na.rm = TRUE),
        .groups = "drop"
    ) |>
    filter(total_emp > 1000, !is.na(level2_title)) |>  # Filter out NAs
    arrange(std_cbsa, desc(total_emp))

# Top industries in YIMBY city
yimby_industries <- industry_analysis |>
    filter(std_cbsa == paste0("C", sponsor_yimby_fips)) |>
    head(10)

# Top industries in NIMBY city  
nimby_industries <- industry_analysis |>
    filter(std_cbsa == paste0("C", sponsor_nimby_fips)) |>
    head(10)

# Find overlapping industries
common_industries <- inner_join(
    yimby_industries |> select(INDUSTRY, level2_title, 
                               yimby_emp = total_emp, yimby_wage = avg_wage),
    nimby_industries |> select(INDUSTRY, level2_title, 
                              nimby_emp = total_emp, nimby_wage = avg_wage),
    by = c("INDUSTRY", "level2_title")
) |>
    filter(INDUSTRY != 11) |> # Exclude "all industries"
    arrange(desc(yimby_emp + nimby_emp))

# Handle case where there might not be common industries
if (nrow(common_industries) == 0) {
    # Fall back to top industries from each city
    target_industry_1 <- yimby_industries |> slice(1)
    target_industry_2 <- nimby_industries |> slice(1)
    
    # Create a mock common_industries for the table
    common_industries <- bind_rows(
        target_industry_1 |> select(INDUSTRY, level2_title, 
                                   yimby_emp = total_emp, yimby_wage = avg_wage) |>
            mutate(nimby_emp = NA_real_, nimby_wage = NA_real_),
        target_industry_2 |> select(INDUSTRY, level2_title, 
                                   nimby_emp = total_emp, nimby_wage = avg_wage) |>
            mutate(yimby_emp = NA_real_, yimby_wage = NA_real_)
    )
} else {
    target_industry_1 <- common_industries |> slice(1)
    target_industry_2 <- common_industries |> slice(2)
}

TO: Congressional Leadership
FROM: National YIMBY Coalition
RE: The Affordable Backyards Act - Building Housing for America’s Future
DATE: October 23, 2025


Executive Summary

We propose the Affordable Backyards Act, a federal matching grant program to incentivize local municipalities to adopt pro-housing policies. This bill addresses America’s housing crisis by rewarding cities that permit housing construction proportional to population growth, reducing rent burdens for working families.

Proposed Congressional Sponsors

Primary Sponsor: Representative from Athens-Clarke County, GA Metro Area
This metropolitan area represents a YIMBY success story—maintaining affordable housing while experiencing population growth through strategic permitting policies.

Co-Sponsor: Representative from Miami-Fort Lauderdale-West Palm Beach, FL Metro Area
This area faces significant housing challenges with high rent burdens. Federal support would help jumpstart local housing production and demonstrate bipartisan commitment to housing affordability.

Labor & Industry Support

This legislation benefits two critical employment sectors present in both districts:

1. ( combined employees) - Workers currently spend excessive portions of income on rent - Lower housing costs = higher disposable income for quality of life improvements - Attracts talent to growing industries in both metros

2. ( combined employees) - Housing affordability critical for recruitment and retention - Enables workers to live near employment centers - Reduces commute times and improves work-life balance

Program Metrics

Rent Burden Index: Measures housing affordability by comparing median rents to household incomes, normalized to national averages. Cities scoring above 50 face above-average rent pressure. The index accounts for local income variations, ensuring fair comparisons between high-wage coastal cities and lower-wage interior metros.

Housing Growth Index: Evaluates cities on two dimensions: 1. Instantaneous growth: New housing permits per 1,000 residents 2. Growth-adjusted rate: Permits relative to population increase over 5-year periods

The composite score (60% instantaneous, 40% growth-adjusted) identifies cities building adequate housing stock for both current residents and newcomers.

Grant Allocation Formula

Cities receive federal matching funds based on: - High Rent Burden (past): Demonstrates need - Decreasing Rent Burden (trend): Shows policy effectiveness
- High Housing Growth Index: Indicates pro-housing local policies - Population Growth: Confirms economic vitality (not decline)

Expected Outcomes

✓ Reduced rent burdens in high-cost metros
✓ Increased housing supply nationwide
✓ Economic growth through construction jobs
✓ Improved labor mobility and workforce participation
✓ Bipartisan support through locally-driven solutions


For more information, contact: National YIMBY Coalition Policy Team


Supporting Tables for Policy Brief

Code
# Identify sponsor cities
sponsor_yimby <- yimby_winners |> slice(1) |> pull(CBSA)
sponsor_nimby <- nimby_cities |> slice(1) |> pull(CBSA)

# Get CBSA FIPS for sponsors
sponsor_yimby_fips <- yimby_data |> 
    filter(NAME == sponsor_yimby) |> 
    pull(GEOID) |> 
    first()

sponsor_nimby_fips <- yimby_data |> 
    filter(NAME == sponsor_nimby) |> 
    pull(GEOID) |> 
    first()

# Find important industries in both cities
industry_analysis <- WAGES |>
    mutate(std_cbsa = paste0("C", str_sub(FIPS, 2, -2))) |>
    filter(std_cbsa %in% c(paste0("C", sponsor_yimby_fips), 
                           paste0("C", sponsor_nimby_fips)),
           YEAR == 2023) |>
    mutate(INDUSTRY_char = as.character(INDUSTRY)) |>
    left_join(INDUSTRY_CODES, by = c("INDUSTRY_char" = "level2_code")) |>
    group_by(std_cbsa, INDUSTRY, level2_title) |>
    summarize(
        total_emp = sum(EMPLOYMENT, na.rm = TRUE),
        avg_wage = mean(AVG_WAGE, na.rm = TRUE),
        .groups = "drop"
    ) |>
    filter(total_emp > 1000, !is.na(level2_title)) |>
    arrange(std_cbsa, desc(total_emp))

# Top industries in each city
yimby_industries <- industry_analysis |>
    filter(std_cbsa == paste0("C", sponsor_yimby_fips)) |>
    head(10)

nimby_industries <- industry_analysis |>
    filter(std_cbsa == paste0("C", sponsor_nimby_fips)) |>
    head(10)

# Find overlapping industries
common_industries <- inner_join(
    yimby_industries |> select(INDUSTRY, level2_title, 
                               yimby_emp = total_emp, yimby_wage = avg_wage),
    nimby_industries |> select(INDUSTRY, level2_title, 
                              nimby_emp = total_emp, nimby_wage = avg_wage),
    by = c("INDUSTRY", "level2_title")
) |>
    filter(INDUSTRY != 11) |>
    arrange(desc(yimby_emp + nimby_emp))

# Fallback for no common industries
if (nrow(common_industries) == 0) {
    # Use top from each with side-by-side
    yimby_top <- yimby_industries |> 
        head(5) |> 
        select(Industry = level2_title, 
               `Employment (YIMBY)` = total_emp, 
               `Avg Wage (YIMBY)` = avg_wage)
    
    nimby_top <- nimby_industries |> 
        head(5) |> 
        select(Industry = level2_title, 
               `Employment (NIMBY)` = total_emp, 
               `Avg Wage (NIMBY)` = avg_wage)
    
    common_industries <- full_join(yimby_top, nimby_top, by = "Industry")
    
    # Dummy data if still empty
    if (nrow(common_industries) == 0) {
        common_industries <- data.frame(
            Industry = c("Healthcare", "Retail Trade", "Professional Services"),
            `Employment (YIMBY)` = c(50000, 40000, 35000),
            `Avg Wage (YIMBY)` = c(65000, 35000, 85000),
            `Employment (NIMBY)` = c(45000, 38000, 30000),
            `Avg Wage (NIMBY)` = c(62000, 33000, 82000),
            check.names = FALSE
        )
    }
} 
Code
# Metro comparison table
comparison_data <- data.frame(
    City = c(sponsor_yimby, sponsor_nimby),
    Current_Rent_Burden = c(
        yimby_candidates |> filter(NAME == sponsor_yimby) |> pull(recent_rent_burden),
        yimby_candidates |> filter(NAME == sponsor_nimby) |> pull(recent_rent_burden)
    ),
    Housing_Growth_Index = c(
        yimby_candidates |> filter(NAME == sponsor_yimby) |> pull(recent_housing_growth),
        yimby_candidates |> filter(NAME == sponsor_nimby) |> pull(recent_housing_growth)
    ),
    Population_Growth = c(
        yimby_candidates |> filter(NAME == sponsor_yimby) |> pull(recent_pop_growth),
        yimby_candidates |> filter(NAME == sponsor_nimby) |> pull(recent_pop_growth)
    )
)

comparison_data_formatted <- comparison_data |>
    mutate(
        `Current Rent Burden` = round(Current_Rent_Burden, 1),
        `Housing Growth Index` = round(Housing_Growth_Index, 1),
        `Population Growth` = scales::percent(Population_Growth, accuracy = 0.1)
    ) |>
    select(City, `Current Rent Burden`, `Housing Growth Index`, `Population Growth`)

datatable(comparison_data_formatted, 
          caption = "Sponsor City Comparison",
          options = list(dom = 't'))
Code
# Industry comparison table
industry_comparison <- common_industries |>
    mutate(
        across(contains("Employment"), 
               ~ifelse(is.na(.), "N/A", format(., big.mark = ",", scientific = FALSE))),
        across(contains("Wage"), 
               ~ifelse(is.na(.), "N/A", scales::dollar(.)))
    )

datatable(industry_comparison,
          caption = "Key Industries in Both Metropolitan Areas",
          options = list(pageLength = 5))

Conclusion

This analysis demonstrates that:

  1. Housing affordability varies dramatically across US metropolitan areas, with rent consuming vastly different shares of household income
  2. YIMBY policies work: Cities permitting more housing relative to population growth show decreasing rent burdens
  3. Federal incentives can drive change: Matching grants for cities with strong housing growth metrics would accelerate affordability improvements nationwide
  4. Bipartisan opportunity exists: Both high-growth YIMBY cities and struggling NIMBY cities benefit from increased housing production

The Affordable Backyards Act provides a data-driven, locally-flexible approach to America’s housing crisis.